{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from random import choice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Restaurant:\n",
    "    def __init__(self, mu, dev):\n",
    "        self.mu = mu\n",
    "        self.dev = dev\n",
    "    def sample(self):\n",
    "        return np.random.normal(self.mu, self.dev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "def explore_only(candidates, num_days):\n",
    "    scores = []\n",
    "    for _ in range(num_days):\n",
    "        scores.append(choice(candidates).sample())\n",
    "    return sum(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exploit_only(candidates, num_days):\n",
    "    scores = [c.sample() for c in candidates]\n",
    "    chosen = candidates[np.argmax(scores)]\n",
    "    for _ in range(num_days - len(candidates)):\n",
    "        scores.append(chosen.sample())\n",
    "    return sum(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "def epsilon_greedy(candidates, num_days, epsilon=0.05):\n",
    "    scores = []\n",
    "    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
    "    for _ in range(num_days - len(candidates)):\n",
    "        p = np.random.random()\n",
    "        #explore\n",
    "        if p < epsilon:\n",
    "            chosen = choice(candidates)\n",
    "        #exploit\n",
    "        else:\n",
    "            chosen = candidates[sorted(history.items(), key=lambda pair: np.mean(pair[1]))[-1][0]]\n",
    "        score = chosen.sample()\n",
    "        scores.append(score)\n",
    "        history[candidates.index(chosen)].append(score)\n",
    "    return sum(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ucb1(candidates, num_days):\n",
    "    scores = []\n",
    "    history = {idx: [c.sample()] for idx,c in enumerate(candidates)}\n",
    "    for t in range(len(candidates), num_days):\n",
    "        mu_plus_ucb = [np.mean(history[idx]) + np.sqrt(2*np.log(t) / len(history[idx])) for idx in range(len(candidates))]\n",
    "        chosen = candidates[np.argmax(mu_plus_ucb)]\n",
    "        \n",
    "        score = chosen.sample()\n",
    "        scores.append(score)\n",
    "        history[candidates.index(chosen)].append(score)\n",
    "    return sum(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "dev_factor = 0.5\n",
    "num_restaurants = 3\n",
    "\n",
    "mu_vals = [3*i for i in range(1,num_restaurants+1)]\n",
    "dev_vals = [mu*dev_factor for mu in mu_vals]\n",
    "mu_dev_pairs = zip(mu_vals, dev_vals)\n",
    "\n",
    "candidates = [Restaurant(mu,dev) for mu,dev in mu_dev_pairs]\n",
    "\n",
    "num_days = 300\n",
    "\n",
    "optimal_average = max(mu_vals)*num_days"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Explore Only Mean Regret: 0.33400345242040025\n"
     ]
    }
   ],
   "source": [
    "explore_only_vals = []\n",
    "for _ in range(1000):\n",
    "    val = explore_only(candidates, num_days)\n",
    "    explore_only_vals.append(val)\n",
    "print('Explore Only Mean Regret: %s'%((optimal_average - np.mean(explore_only_vals)) / optimal_average))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Exploit Only Mean Regret: 0.10974979914722435\n"
     ]
    }
   ],
   "source": [
    "exploit_only_vals = []\n",
    "for _ in range(1000):\n",
    "    val = exploit_only(candidates, num_days)\n",
    "    exploit_only_vals.append(val)\n",
    "print('Exploit Only Mean Regret: %s'%((optimal_average - np.mean(exploit_only_vals)) / optimal_average))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epsilon Greedy Mean Regret (10%): 0.061901290618584424\n"
     ]
    }
   ],
   "source": [
    "epsilon_greedy_vals = []\n",
    "for _ in range(1000):\n",
    "    val = epsilon_greedy(candidates, num_days, 0.1)\n",
    "    epsilon_greedy_vals.append(val)\n",
    "print('Epsilon Greedy Mean Regret (10%%): %s'%((optimal_average - np.mean(epsilon_greedy_vals)) / optimal_average))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UCB1 Mean Regret: 0.05807450789812113\n"
     ]
    }
   ],
   "source": [
    "ucb1_vals = []\n",
    "for _ in range(1000):\n",
    "    val = ucb1(candidates, num_days)\n",
    "    ucb1_vals.append(val)\n",
    "print('UCB1 Mean Regret: %s'%((optimal_average - np.mean(ucb1_vals)) / optimal_average))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
